/* *********************************************************
Program to get descriptives by passing status for "any remediation" sample
Note: earlier Table 1 created in analysis.do


This version makes 2 changes to version "2": 
	1. Restricts sample to >-100
	2. Fixes earnings variables for later years
********************************************************** */

cap log close
clear
program drop _all
set linesize 155

global fmartorell_home="/mnt/data/tsp/users/fmartorell/"
*cap do ${fmartorell_home}top_program
global d1="${col_remediation}program/program_paco/publication/"
cap do ${d1}do/top_program

set mem 1000m

*use ${fmartorell_home}remediation/data/taspearn_allyears, clear
use ${d1}data/taspearn_allyears, clear


 ** set to . if year =7 and from tspyr 198 sem 2 or later
replace earn_yr7=. if (tspyr>198 | (tspyr==198 & firstsem>1))

forvalues yr=5/7 {
 gen byte posearn`yr'=(earn_yr`yr')>0 & earn_yr`yr'<.
 gen byte pern_nen`yr'=posearn`yr'==1 & enrolled`yr'==0
 gen earn_yr`yr'_pos=earn_yr`yr' if posearn`yr'==1
 gen earn_yr`yr'_pnen=earn_yr`yr' if pern_nen`yr'==1
}
gen byte allearn=1

 ** set to . if year =7 and from tspyr 198 sem 2 or later
replace posearn7=. if (tspyr>198 | (tspyr==198 & firstsem>1))
replace pern_nen7=. if (tspyr>198 | (tspyr==198 & firstsem>1))
replace earn_yr7_pos=. if (tspyr>198 | (tspyr==198 & firstsem>1))
replace earn_yr7_pnen=. if (tspyr>198 | (tspyr==198 & firstsem>1))

keep altpid allearn posearn5 pern_nen5 posearn6 pern_nen6 posearn7 pern_nen7 earn_yr*
bysort altpid: assert _N==1
*save ${fmartorell_home}tmp/tempearn, replace
save ${d1}tmp/tempearn, replace


*use ${fmartorell_home}remediation/data/tasp192_200_withall, clear
use ${d1}data/tasp192_200_withall, clear
*keep if uniform()<.01
bysort altpid: assert _N==1
*merge altpid using ${fmartorell_home}tmp/tempearn, unique
merge altpid using ${d1}tmp/tempearn, unique
assert _merge!=2

/* ----------------------------------------------------------------------------------------------
Create covariates and final outcome variables
----------------------------------------------------------------------------------------------- */
*Variables for dropping out after first semester 
gen byte inreport1=0
forvalues sem=1/4 {
 qui replace inreport1=1 if firstsem==`sem' & sr==0 & oldjrvalidmatch_r1_0_`sem'==1
 cap replace inreport1=1 if firstsem==`sem' & sr==1 & srvalidmatch_r1_0_`sem'==1
}
tab inreport1
forvalues sem=1/4 {
 cap replace inreport1=1 if firstsem==`sem' & oldjrvalidmatch_r1_0_`sem'==1 | srvalidmatch_r1_0_`sem'==1
}
tab inreport1
assert oldjrvalidmatch_r1_0_4==jrvalidmatch_r1_0_4

*Distance and tuition variables
gen byte distmiss=distance==.
gen byte distl25nm=distless25
replace distl25nm=0 if distl25nm==.
gen byte distm50nm=distmore50
replace distm50nm=0 if distm50nm==.

gen byte tutmiss=indist==.
gen indistnm=indist
replace indistnm=0 if indistnm==.


gen byte white=ethnic_rep2==5
gen byte hisp=ethnic_rep2==4
gen byte black=ethnic_rep2==3

gen byte startfall=firstsem==1

gen byte seekbac=objectiv==4

gen byte maxnewhg=max(newhgjr,newhgsr)
forvalues i=1/4 {
 gen byte newhgsr`i'=(newhgsr>=`i')
 gen byte maxnewhg`i'=(maxnewhg>=`i')
}
gen byte newhgjr1=newhgjr>=1
gen byte newhgjr2=newhgjr>=2

gen byte oldtrandwn=jrcredit_ac>=15 if sr==1
gen byte oldtrandwn_gpa=jrcredit_ac_gpa>=15 if sr==1
rename oldtranup_gpa tranup_gpa

gen byte agefirstenr=(round((((tspyr-1+1800)*12+9)-(y_rep2*12+m_rep2))/12)) if firstsem==1
replace agefirstenr=(round((((tspyr+1800)*12+9)-(y_rep2*12+m_rep2))/12)) if firstsem!=1
gen byte nontradage=agefirstenr>=21 if agefirstenr<.

gen byte all=1

gen acadyr_nes=admindate_y1 if admindate_m1<=8
replace acadyr_nes=admindate_y1+1 if admindate_m1>=9
replace acadyr_nes=1991 if acadyr_nes<1991
tab acadyr_nes
forvalues y=1992/2000 {
 gen dacyr`y'=acadyr_nes==`y'
}

gen byte acadterm=1*inlist(admindate_m1,9,10,11,12)+2*inlist(admindate_m1,1,2,3,4,5)+3*inlist(admindate_m1,6,7,8)
gen byte diff=(tspyr+1800-acadyr_nes)*3+(firstsem-acadterm) 
gen byte delay2=diff>=2
gen byte delay1=diff>=2


replace rawmth=. if mscode!="S"
replace rawred=. if rscode!="S"

gen byte fgrmth_cndl=fgrademath if inlist(fgrademath,1,2,3,4,5)==1
count if fgrmth_cndl==.
replace fgrmth_cndl=5-fgrmth_cndl  /* so that A=4, B=3, etc */
gen byte attemptmath=fgrademath!=.

count if sem1sch!=orig_sch

gen rsc_hgh=max(rsc_math,rsc_read)

gen misecon=econ_dis==.
replace econ_dis=econ_dis>0
replace econ_dis=0 if misecon==1
summ econ_dis

forvalues y=192/199 {
 gen byte d`y'=tspyr==`y'
}

gen early=tspyr<=195

gen maxcredit_ac=max(srcredit_ac,jrcredit_ac)
gen sumcredit_ac=srcredit_ac+jrcredit_ac
gen maxcredit_gpa=max(srcredit_ac_gpa,jrcredit_ac_gpa)
gen sumcredit_gpa=srcredit_ac_gpa+jrcredit_ac_gpa

 
gen byte srjrgrwin4=srgrwin4==1 | jrgrwin4==1 if sr==0
gen byte srjrgrwin5=srgrwin5==1 | jrgrwin5==1 if sr==0
gen byte srjrgrwin6=srgrwin6==1 | jrgrwin6==1 if sr==0

replace srjrgrwin4=srgrwin4==1  if sr==1   /* note that variable names are the same for programming ease */
replace srjrgrwin5=srgrwin5==1  if sr==1
replace  srjrgrwin6=srgrwin6==1  if sr==1


global m="rsc_math"
global r="rsc_read"
global any="minscore"
global anyr="minscore"
global anym="minscore"


/* --------------------------------------------------------------------------------------------------
Sample selection
-------------------------------------------------------------------------------------------------- */
count

*** Keep if degree seeking ***
keep if (seekdeg_undec==1 & sr==0) | (seekbac==1 & sr==1)   

*Drop records where student not initially a freshman
gen freshman=initsrtype==1 if sr==1 & initsrtype<.
replace freshman=initjrtype==1 if sr==0 & initjrtype<.
tab freshman sr, missing
keep if freshman==1

*Drop observations with missing data
keep if ethnic_rep2!=. & sr!=. & nontradage!=. 

count

*Drop students who accoring to report2 are exempt from the TASP requirements
keep if notexempt==1

*Drop records that don't match to NES
tab nes_match sr, missing row col
keep if nes_match==1

*Drop students who didn't take the TASP win 1 semester of starting college
keep if win1sem==1
summ diff, detail

*Drop records where the math or reading score is not valid
keep if (rsc_math>=-130 & rsc_math<=80) & (rsc_read>=-130 & rsc_read<=80)


*Identify analysis samples

gen byte allvalid=(rsc_math>=-130 & rsc_math<=80) & (rsc_read>=-130 & rsc_read<=80) & (rsc_writ>=-130 & rsc_writ<=80)
gen minscore=min(rsc_math,rsc_read)

gen byte anysample=allvalid==1 & rsc_writ>=0  
gen byte anymsample=anysample==1 & minscore==rsc_math
gen byte anyrsample=anysample==1 & minscore==rsc_read /* note: there will be some overlap between m and r */

gen byte msample=(rsc_math>=-130 & rsc_math<=80)
gen byte rsample=(rsc_read>=-130 & rsc_read<=80)

tab sr if anysample==1 & minscore>=-100


tab sr if msample==1 & rsc_math>=-100
tabstat rsc_read if msample==1 & rsc_math>=-100, by(sr) stat(mean sd)
tab sr if msample==1 & inrange(rsc_m,-10,10)

tab sr if rsample==1 & rsc_rea>=-100
tabstat rsc_math if rsample==1 & rsc_read>=-100, by(sr) stat(mean sd)
tab sr if rsample==1 & inrange(rsc_read,-10,10)

keep if anysample==1 | msample==1 | rsample==1






/* --------------------------------------------------------------------------------------
Calculate sample means by remediation status
--------------------------------------------------------------------------------------- */
count if sr==.

gen firstsemcr=srsch_yr0 if sr==1
replace firstsemcr=jrsch_yr0 if sr==0
assert firstsemcr!=. if startfall==1

  gen x=minscore
 gen x2=x^2
 gen x3=x^3
 gen pass=x>=0
 gen inter=pass*x
 gen inter2=pass*x2
 gen inter3=pass*x3

gen transfer=tranup if sr==0
replace transfer=trandwn if sr==1
assert transfer!=.

gen byte passwrt=rsc_writ>=0
gen byte passmath=rsc_m>=0
gen byte passread=rsc_r>=0

count if distmiss!=(distless25==.)
global covs="distmiss distl25nm distm50nm indistnm white hisp startfall nontradage d192-d199 econ_dis misecon dacyr1992-dacyr2000 delay2"


global non01="firstsemcr fgrmth_cndl sumcredit_ac rsc_math rsc_read rsc_hgh minscore earn_yr7_pnen"


forvalues i=1/1 {

*cap log using ${fmartorell_home}remediation/programs/descstats.log, replace
cap log using ${d1}log/descstats.log, replace
disp "Var" _skip(8) "Total" _skip(8) "Remediated" _skip(8) "Non-Rem" _skip(8) "t-stat"
cap log close

forvalues sr=0/1 {
 *qui log using ${fmartorell_home}remediation/programs/descstats.log, append
 qui log using ${d1}log/descstats.log, append
 disp "------------------SR=`sr'--------------------"
 foreach var of varlist demath_2sem deread_2sem dewrit_2sem pass firstsemcr sumcredit_ac attemptmath passcollmath fgrmth_cndl transfer maxnewhg1 maxnewhg2 /*
 */ maxnewhg3 maxnewhg4 srjrgrwin4 srjrgrwin5 srjrgrwin6 posearn5 posearn6 posearn7 white econ_dis misecon nontradage distl25nm distm50nm distmiss indistnm early rsc_hgh minscore startfall rsc_math rsc_read {

  qui summ `var' if sr==`sr' & anysample==1 & minscore>=-100 & win1sem==1
  local mean=r(mean)
  qui ttest `var' if sr==`sr' & anysample==1 & win1sem==1 & minscore>=-100, by(anydeved_2sem)
  
  disp "`var'" "," `mean' "," r(mu_2) "," r(mu_1) "," r(t) "," r(N)
  
  foreach v of varlist $non01 {
   if "`var'"=="`v'" {
    disp " ,"  r(sd) "," r(sd_2) "," r(sd_1) ","
   }
  }
 }
  cap log close
}

}


